#wru L2
tx <- readRDS("temp/combined.rds") |> 
  mutate(county = str_pad(Voters_FIPS, width = 3, side = "left", pad = "0"))

cs <- filter(fips_codes, state == "TX") %>% 
  select(COUNTY_NAME = county,
         county = county_code) %>% 
  mutate(COUNTY_NAME = toupper(gsub(" County", "", COUNTY_NAME)))

tx <- left_join(tx, cs)

####MAIL BALLOT APPLCIATIONS ACCEPTED/REJECTED####

#rejections
rej_app <- read_excel("data/PIR 22-0331 - Batch 1/2-Individual-Mail-Application-Rejected.xlsx") %>% 
  filter(COUNTY_NAME != "TRAVIS") %>% 
  mutate(COUNTY_NAME = ifelse(COUNTY_NAME == "LASALLE", "LA SALLE", COUNTY_NAME))
#requests
req_app <- read_excel("data/PIR 22-0331 - Batch 1/1-Individual-Mail-Application-Requested.xlsx")

## individual counties

travis_rej <- read_excel("data/individual_counties_abbms/travisCANNOT_SHARE/064.127323 (CONFIDENTIAL) 2022 P22 App Rejections (FINAL).xlsx") %>% 
  mutate(COUNTY_NAME = "TRAVIS",
         VUID = as.numeric(VUID)) %>% 
  select(VUID, REJECT_REASON = `Rejection Reason`,
         COUNTY_NAME)

el_paso_rej <- read_excel("data/individual_counties_abbms/el_pasoCANNOT_SHARE/EPC004725.XLSX") %>% 
  mutate(COUNTY_NAME = "EL PASO",
         VUID = as.numeric(VUID)) %>% 
  select(VUID, REJECT_REASON = `REASON ABBM REJ`,
         COUNTY_NAME)

webb_rej <- read_excel("data/individual_counties_abbms/Webb - clean.xlsx") %>% 
  mutate(COUNTY_NAME = "WEBB",
         Voters_StateVoterID = as.numeric(Voters_StateVoterID)) %>% 
  select(VUID = Voters_StateVoterID, REJECT_REASON = rejection_reason,
         COUNTY_NAME)

rej_app <- bind_rows(rej_app, travis_rej, el_paso_rej, webb_rej)

cats <- fread("data/rejs_cats.csv") %>% 
  select(-n)

rej_app <- left_join(rej_app, cats) |> 
  mutate(V3 = ifelse(is.na(V3), "Other", V3))

rej_app <- rej_app %>% 
  select(-REJECT_REASON) %>% 
  rename(`Rejection Reason App` = V3)

bad_counties <- unique(tx$COUNTY_NAME)[!(unique(tx$COUNTY_NAME) %in% c(rej_app$COUNTY_NAME))]

pop <- get_acs("county", state = "TX", year = 2020, variables = "B03002_001") %>% 
  select(NAME, population = estimate) %>%
  filter(population >= 50000) %>%
  mutate(NAME = toupper(gsub(" County, Texas", "", NAME)))

bad_counties2 <- bad_counties[bad_counties %in% pop$NAME]

####MAIL BALLOT ACCEPTED/REJECTED####

#acceptances
acc <- read_excel("data/PIR 22-0331 - Batch 2/4-Individual-Mail-Ballot-Accepted.xlsx")
#rejection
rej <- read_excel("data/PIR 22-0331 - Batch 2/3-Individual-Mail-Ballot-Rejected.xlsx") %>% 
  filter(!(VUID %in% acc$VUID))

rej_app <- filter(rej_app, !(VUID %in% c(acc$VUID, rej$VUID)))

c2 <- fread("data/rej_bal_cat.csv")

rej <- left_join(rej, c2) %>% 
  select(-n, -REJECT_REASON) %>% 
  rename(`Rejection Reason` = V3) %>% 
  mutate(`Rejection Reason` = ifelse(is.na(`Rejection Reason`), "Other", `Rejection Reason`))

rejsb <- rej %>% 
  filter(!(COUNTY_NAME %in% bad_counties2)) |> 
  group_by(`Rejection Reason` = `Rejection Reason`) %>% 
  tally(name = "Ballots Rejected")

rejs_app_ll <- rej_app %>% 
  filter(!(COUNTY_NAME %in% bad_counties2)) |> 
  group_by(`Rejection Reason` = `Rejection Reason App`) %>% 
  tally(name = "Applications_Rejected")

rejs_ll <- full_join(rejs_app_ll, rejsb) |> 
  arrange(-`Ballots Rejected`, -`Applications_Rejected`) |> 
  mutate(across(c(`Ballots Rejected`, Applications_Rejected), ~ . / sum(., na.rm = T)))


rejs_ll <- bind_rows(rejs_ll,
                     rejs_ll |> 
                       summarize(across(c(`Ballots Rejected`, Applications_Rejected), ~sum(., na.rm = T))) |> 
                       mutate(`Rejection Reason` = "Total")) |> 
  mutate(`Rejection Reason` = ifelse(`Rejection Reason` == "Incorrect or Missing Number",
                                     "SB1 (Incorrect or Missing Number)", `Rejection Reason`),
         across(ends_with("jected"), ~ percent(., accuracy = .1)),
         `Ballots Rejected` = ifelse(is.na(`Ballots Rejected`), "", `Ballots Rejected`)) |> 
  rename(`Applications Rejected` = Applications_Rejected)

knitr::kable(rejs_ll, booktabs = T, caption = "\\label{tab:rejection-breakout} Rejections by Reason, 2022 Primary", linesep = "",
             longtable = T, align=c('l', rep('c', ncol(rejs_ll) - 1)), format = "latex") %>%
  kable_styling(font_size = 12,
                latex_options = c("HOLD_position", "repeat_header")) %>%
  row_spec(nrow(rejs_ll) - 1, hline_after = T) |> 
  save_kable(paste0("Output/Tables/rejs_ll.tex"))


tx <- tx %>% 
  mutate(vuid = as.numeric(vuid),
         app_submitted = vuid %in% c(req_app$VUID, rej_app$VUID, acc$VUID, rej$VUID),
         app_rejected_sb1 = vuid %in% c(filter(rej_app, `Rejection Reason App` == "Incorrect or Missing Number")$VUID),
         app_rejected_any = vuid %in% c(filter(rej_app)$VUID),
         ballot_rejected_sb1 = vuid %in% filter(rej, `Rejection Reason` == "Incorrect or Missing Number")$VUID,
         ballot_rejected_any = vuid %in% filter(rej)$VUID,
         mail_ballot_accepted = vuid %in% acc$VUID,
         ballot_submitted = vuid %in% c(acc$VUID, rej$VUID),
         pred.nw = pred.bla + pred.his + pred.asi + pred.oth,
         either_rej_sb1 = ballot_rejected_sb1 | app_rejected_sb1,
         bad_counties = COUNTY_NAME %in% bad_counties,
         bad_counties2 = COUNTY_NAME %in% bad_counties2)


tx <- filter(tx, app_submitted)

tx <- mutate(tx,
             Primary_2022_03_01 = ifelse(BallotType_Primary_2022_03_01 == "Absentee" & !mail_ballot_accepted,
                                         "", Primary_2022_03_01),
             BallotType_Primary_2022_03_01 = ifelse(BallotType_Primary_2022_03_01 == "Absentee" & !mail_ballot_accepted,
                                                    "", BallotType_Primary_2022_03_01))


rej <- rej |> 
  group_by(VUID) |> 
  filter(row_number() == 1) |> 
  select(vuid = VUID,
         `Rejection Reason`)

rej_app <- rej_app |> 
  group_by(VUID) |> 
  filter(row_number() == 1) |> 
  select(vuid = VUID,
         `Rejection Reason App`)

tx <- left_join(tx, rej)

tx <- left_join(tx, rej_app)

cleanup("tx")
############################
############################
############################

hist <- fread("C:/Users/Researcher 2/Desktop/VM2--TX--2022-05-07/VM2--TX--2022-05-07-VOTEHISTORY.tab",
              sep = "\t",
              select = c("LALVOTERID", 
                         "PRI_BLT_2010_03_02",
                         "PRI_BLT_2012_05_29",
                         "PRI_BLT_2014_03_04",
                         "PRI_BLT_2016_03_01",
                         "PRI_BLT_2018_03_06",
                         "PRI_BLT_2020_03_03"))

tx <- left_join(tx, hist) |> 
  mutate(sum_d = (PRI_BLT_2010_03_02 == "D") +
           (PRI_BLT_2012_05_29 == "D") +
           (PRI_BLT_2014_03_04 == "D") +
           (PRI_BLT_2016_03_01 == "D") +
           (PRI_BLT_2018_03_06 == "D") +
           (PRI_BLT_2020_03_03 == "D"),
         sum_r = (PRI_BLT_2010_03_02 == "R") +
           (PRI_BLT_2012_05_29 == "R") +
           (PRI_BLT_2014_03_04 == "R") +
           (PRI_BLT_2016_03_01 == "R") +
           (PRI_BLT_2018_03_06 == "R") +
           (PRI_BLT_2020_03_03 == "R"),
         party = ((-1*sum_d) + (sum_r)) / (sum_d + sum_r),
         party = ifelse(is.na(party), 0, party)) |> 
  select(-sum_d, -sum_r, -starts_with("PRI_BLT_2"))

tx <- select(tx, -General_2020_11_03)

income <- income <- get_acs(geography = "tract",
                            variables = c(medincome = "B19013_001"),
                            state = "TX",
                            year = 2022) %>%
  dplyr::select(-variable, -moe) %>%
  dplyr::rename(median_income = estimate)
unem <- unemployment <- get_acs(geography = "tract",
                                variables = c(lf = "B23025_003",
                                              unem = "B23025_005"),
                                output = "wide",
                                state = "TX",
                                year = 2022) %>%
  dplyr::mutate(unem = unemE / lfE) %>%
  dplyr::select(GEOID, NAME, unem)
educ <- get_acs(geography = "tract",
                variables = c("B15002_014",
                              "B15002_015",
                              "B15002_016",
                              "B15002_017",
                              "B15002_018",
                              
                              "B15002_031",
                              "B15002_032",
                              "B15002_033",
                              "B15002_034",
                              "B15002_035"),
                
                summary_var = "B15002_001",
                state = "TX", year = 2022) %>%
  dplyr::group_by(GEOID, NAME) %>%
  dplyr::summarize(college_grad = sum(estimate / summary_est))

tx$GEOID <- substring(tx$GEOID, 1, 11)

tx <- left_join(tx, left_join(income, left_join(unem, educ)))

l2 <- fread("C:/Users/Researcher 2/Desktop/VM2--TX--2022-05-07/VM2--TX--2022-05-07-DEMOGRAPHIC.tab",
            select = c("LALVOTERID", "CommercialData_EstimatedHHIncomeAmount",
                       "CommercialData_Education"))


tx <- left_join(tx, l2)

tx <- tx |> 
  mutate(income = as.integer(gsub("[$]", "", CommercialData_EstimatedHHIncomeAmount)),
         education = case_when(grepl("Less than HS Diploma", CommercialData_Education) ~ "lths",
                               grepl("HS Diploma", CommercialData_Education) ~ "hs",
                               grepl("Some College", CommercialData_Education) ~ "some college",
                               grepl("Bach Degree", CommercialData_Education) ~ "bachelors",
                               grepl("Grad Degree", CommercialData_Education) ~ "grad",
                               grepl("Vocational", CommercialData_Education) ~ "vocational",
                               T ~ "unknown")) |> 
  select(-starts_with("CommercialData"))


t1 <- read_xlsx("data/GENERAL_ELECTION/STATE182525.xlsx") |>
  filter(DT_APP_REJECTED > "2022-10-01")
t2 <- read_xlsx("data/GENERAL_ELECTION/STATE182526.xlsx") |>
  filter(DT_BALLOT_REJECTED > "2022-10-01")

tx$r <- tx$vuid %in% c(t1$VUID, t2$VUID)

mean(filter(tx, app_rejected_any)$vuid %in% c(t1$VUID, t2$VUID))
mean(filter(tx, ballot_rejected_any)$vuid %in% c(t1$VUID, t2$VUID))

############################
############################
############################
# BIRDiE


pop <- get_decennial("block", variables = c("total" = "P2_001N",
                                            "white" = "P2_005N",
                                            "black" = "P2_006N",
                                            "hisp" = "P2_002N",
                                            "asian" = "P2_008N",
                                            "aian" = "P2_007N"),
                     year = 2020, output = "wide",
                     state = "TX") |> 
  mutate(other = total - white - black - hisp - asian - aian) |> 
  select(-total, -NAME)

cvap <- cvap_get("TX", 2021, "state") |> 
  select(-GEOID) |> 
  pivot_longer(starts_with("cvap_")) |> 
  mutate(val = value / cvap,
         race = gsub("cvap_", "", name),
         race = ifelse(race %in% c("other", "nhpi", "two"), "other", race)) |> 
  group_by(race) |> 
  summarize(val = sum(val))

cvap_v <- cvap$val
names(cvap_v) <- cvap$race

out_p <- bisg_me(~ nm(surname) + GEOID, data = tx, p_r = cvap_v,
                 p_rgx = pop, cores = 4)
rm(pop)
k <- cbind(tx, out_p)

k <- k |> 
  filter(!is.na(pr_white))

k$rejected <- factor(k$app_rejected_any | k$ballot_rejected_any)

### FEED IN THE `RAW` BISG ESTIMATES FROM THE LAST RUN (`bisgme()`)

fit <- birdie(select(k, starts_with("pr_")), rejected ~ GEOID,
              data = k)

k <- k[, c(1)]

k <- cbind(k, fitted(fit))

k$pr_other <- k$pr_other + k$pr_aian

k <- select(k, -pr_aian)

tx <- left_join(tx, k, by = c("LALVOTERID" = "k"))

############################
############################
############################

## anonymize

tx <- select(tx, -starts_with("Residence_Addresses"), -Parties_Description,
             -GEOID, -state, -county, -tract, -block, -NAME, -vuid,
             -COUNTY_NAME, -surname)

## generate random ID

tx$so <- runif(nrow(tx))

tx <- arrange(tx, so) %>% 
  mutate(LALVOTERID = paste0("ID", row_number())) %>% 
  select(-so)

saveRDS(tx, "temp/tx_submitters.rds")

############################
############################
############################
cleanup()


